# PSRM Validation Nov 2025
sessionInfo() # package version
#R version 4.3.3 (2024-02-29 ucrt)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 11 x64 (build 26200)


library(broom)
library(boot)
library(dplyr)
library(extrafont)
library(ggplot2)
library(lmtest)
library(lsa)
library(LSX)
library(lubridate)
library(MASS)
library(nnet)
library(pscl)
library(quanteda)
library(quanteda.textmodels)
library(quanteda.textplots)
library(reshape)
library(reshape2)
library(texreg)
library(tidyverse)
library(tmcn)
library(topicmodels)
library(word2vec)
library(xtable)
library(xts)
library(zoo)

# Loading the data --------------------------------------------------------

tw         <- readRDS("tw_restricted_validation_nov_2025.Rds")  ## Taiwan dataframe
tw_dfm     <- readRDS("tw_dfm_validation_nov_2025.Rds") ## pre-tokenized dfm
tw_dfm_w   <- readRDS("tw_dfm_w_valdiation_nov_2025.Rds") ## pre-tokenized dfm (weighted)
thai        <- readRDS("thai_validation_nov_2025.Rds")  ## Thai dataframe

long_data <- tw %>%
  pivot_longer(
    cols = c(pk_neg_wt, ntu_neg_w),
    names_to = "variable",
    values_to = "weight",
    names_pattern = "(.*)_neg", # Use a regular expression to extract the prefix
    names_transform = list(variable = function(x) {
      case_when(
        x == "pk" ~ "Tsinghua University Negative Sentiment",
        x == "ntu" ~ "NTU Negative Sentiment"
      )
    })
  )

correlations <- long_data %>%
  group_by(variable) %>%
  dplyr::summarize(cor = cor(weight, implicit_threat, use = "complete.obs"))



# Correlation test -- Appendix, Figure 1, p.2 --------------------------------------------------



correlation_plot <-ggplot(long_data, aes(x = weight, y = implicit_threat)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add linear regression line
  facet_wrap(~ variable, scales = "free_x") +  # Create facets for each weight type
  geom_label(data = correlations, aes(x = Inf, y = Inf, label = sprintf("Cor: %.2f", cor)),
             hjust = 1.1, vjust = 1.1, color = "red") +  # Add correlation labels
  labs(x = "Negative Sentiment", y = "Implicit Threat Sentiment")+
  theme_minimal()


ggsave("Appendix_Figure_1.png", correlation_plot, width = 6, height = 4, dpi = 300)




# Placebo test -- Appendix, Figure 2, p.3 -----------------------------------------------------------------

thai_plot <-ggplot(thai, aes(x = date, y = implicit_threat)) +
  geom_point()+
  geom_smooth(method = loess) + # Adds a smooth trend line
  theme_classic() + # Uses the classic theme
  labs(
    x = "Year",
    y = "Implicit Threat Score"
  ) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") + # Set x-axis breaks to every year
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


ggsave("Appendix_Figure_2.png", thai_plot, width = 6, height = 4, dpi = 300)



# Predictive Validity Test - Appendix, Table 3,4 and Figure 3 pp.4-6 -----------------------------------------------------
K <- 10
set.seed(1)
lda <- LDA(tw_dfm, k = K, method = "Gibbs", 
           control = list(verbose=25L, seed = 123,
                          burnin = 100, iter = 500))

terms <- get_terms(lda, 10)
topics <- get_topics(lda, 1)



tw$name_topic <- NA
tw$name_topic[topics==1] <- "China's Econ Exchange"
tw$name_topic[topics==2] <- "China's Poli Exchange"
tw$name_topic[topics==3] <- "China's Econ Exchange"
tw$name_topic[topics==4] <- "DPP & US"
tw$name_topic[topics==5] <- "DPP & US"
tw$name_topic[topics==6] <- "China's Poli Exchange"
tw$name_topic[topics==7] <- "DPP & US"
tw$name_topic[topics==8] <- "DPP & US"
tw$name_topic[topics==9] <- "China's Poli Exchange"
tw$name_topic[topics==10] <- "China's Cult Exchange"




table(tw$name_topic)
tw$name_topic <- as.factor(tw$name_topic)

multinom_model <- multinom(name_topic ~ implicit_threat , data = tw)

# Appendix Table 3 Regression Results p.4
screenreg(multinom_model)

texreg(multinom_model,file = "Appendix_Table_3.txt")




# Convert predicted probabilities into a data frame for plotting
tw$predicted_probs <- predict(multinom_model, type = "probs")
predicted_df <- as.data.frame(tw$predicted_probs)
predicted_df$implicit_threat <- tw$implicit_threat
predicted_df$actual_category <- tw$name_topic
predicted_df_long <- predicted_df %>%
  gather(key = "category", value = "probability", -implicit_threat, -actual_category)

# Appendix Figure 3 p.5
predictive_test_plot<-ggplot(predicted_df_long, aes(x = implicit_threat, y = probability, color = category)) +
  geom_line(size = 1.5) +
  labs(
    # title = "Predicted Probabilities by Category",
    x = "Implicit Threat Score",
    y = "Predicted Probability",
    color = "Category"
  ) +
  theme_minimal()

ggsave("Appendix_Figure_3.png", predictive_test_plot, width = 6, height = 4, dpi = 300)



# Classified Features Appendix Table 4 p.6 
latex_table <- xtable(terms) 
write.csv(terms, "Appendix_Table_4.csv") 



